library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
Confirmed_State_6_13 <-   read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/06-13-2020.csv")) %>% 
  filter(Country_Region == "US") %>% 
  group_by(Province_State, Country_Region) %>% 
  summarize(Confirmed = sum(Confirmed))
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_datetime(format = ""),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character(),
##   Incidence_Rate = col_double(),
##   `Case-Fatality_Ratio` = col_double()
## )
## `summarise()` regrouping output by 'Province_State' (override with `.groups` argument)
str(Confirmed_State_6_13)
## tibble [58 × 3] (S3: grouped_df/tbl_df/tbl/data.frame)
##  $ Province_State: chr [1:58] "Alabama" "Alaska" "Arizona" "Arkansas" ...
##  $ Country_Region: chr [1:58] "US" "US" "US" "US" ...
##  $ Confirmed     : num [1:58] 24601 653 34660 12095 150018 ...
##  - attr(*, "groups")= tibble [58 × 2] (S3: tbl_df/tbl/data.frame)
##   ..$ Province_State: chr [1:58] "Alabama" "Alaska" "Arizona" "Arkansas" ...
##   ..$ .rows         : list<int> [1:58] 
##   .. ..$ : int 1
##   .. ..$ : int 2
##   .. ..$ : int 3
##   .. ..$ : int 4
##   .. ..$ : int 5
##   .. ..$ : int 6
##   .. ..$ : int 7
##   .. ..$ : int 8
##   .. ..$ : int 9
##   .. ..$ : int 10
##   .. ..$ : int 11
##   .. ..$ : int 12
##   .. ..$ : int 13
##   .. ..$ : int 14
##   .. ..$ : int 15
##   .. ..$ : int 16
##   .. ..$ : int 17
##   .. ..$ : int 18
##   .. ..$ : int 19
##   .. ..$ : int 20
##   .. ..$ : int 21
##   .. ..$ : int 22
##   .. ..$ : int 23
##   .. ..$ : int 24
##   .. ..$ : int 25
##   .. ..$ : int 26
##   .. ..$ : int 27
##   .. ..$ : int 28
##   .. ..$ : int 29
##   .. ..$ : int 30
##   .. ..$ : int 31
##   .. ..$ : int 32
##   .. ..$ : int 33
##   .. ..$ : int 34
##   .. ..$ : int 35
##   .. ..$ : int 36
##   .. ..$ : int 37
##   .. ..$ : int 38
##   .. ..$ : int 39
##   .. ..$ : int 40
##   .. ..$ : int 41
##   .. ..$ : int 42
##   .. ..$ : int 43
##   .. ..$ : int 44
##   .. ..$ : int 45
##   .. ..$ : int 46
##   .. ..$ : int 47
##   .. ..$ : int 48
##   .. ..$ : int 49
##   .. ..$ : int 50
##   .. ..$ : int 51
##   .. ..$ : int 52
##   .. ..$ : int 53
##   .. ..$ : int 54
##   .. ..$ : int 55
##   .. ..$ : int 56
##   .. ..$ : int 57
##   .. ..$ : int 58
##   .. ..@ ptype: int(0) 
##   ..- attr(*, ".drop")= logi TRUE
Confirmed_State_9_13 <-   read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/09-13-2020.csv")) %>% 
  filter(Country_Region == "US") %>% 
  group_by(Province_State, Country_Region) %>% 
  summarize(Confirmed = sum(Confirmed))
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_datetime(format = ""),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character(),
##   Incidence_Rate = col_double(),
##   `Case-Fatality_Ratio` = col_double()
## )
## `summarise()` regrouping output by 'Province_State' (override with `.groups` argument)
setdiff(Confirmed_State_9_13$Province_State, Confirmed_State_6_13$Province_State)
## character(0)
Confirmed_State_9_13 <- Confirmed_State_9_13 %>% 
  filter(Province_State != "Recovered")

Confirmed_State_6_13 <- Confirmed_State_6_13 %>% 
  filter(Province_State != "Recovered")
Confirmed_State_6_13_9_13_joined <- full_join(Confirmed_State_6_13, Confirmed_State_9_13, by = c("Province_State"))

head(Confirmed_State_6_13_9_13_joined)
## # A tibble: 6 x 5
## # Groups:   Province_State [6]
##   Province_State Country_Region.x Confirmed.x Country_Region.y Confirmed.y
##   <chr>          <chr>                  <dbl> <chr>                  <dbl>
## 1 Alabama        US                     24601 US                    138755
## 2 Alaska         US                       653 US                      6268
## 3 Arizona        US                     34660 US                    208512
## 4 Arkansas       US                     12095 US                     70219
## 5 California     US                    150018 US                    761728
## 6 Colorado       US                     29002 US                     61293
tail(Confirmed_State_6_13_9_13_joined)
## # A tibble: 6 x 5
## # Groups:   Province_State [6]
##   Province_State Country_Region.x Confirmed.x Country_Region.y Confirmed.y
##   <chr>          <chr>                  <dbl> <chr>                  <dbl>
## 1 Virgin Islands US                        72 US                      1220
## 2 Virginia       US                     53869 US                    133742
## 3 Washington     US                     25538 US                     79826
## 4 West Virginia  US                      2274 US                     12705
## 5 Wisconsin      US                     22518 US                     89185
## 6 Wyoming        US                      1050 US                      4346
which(is.na(Confirmed_State_6_13_9_13_joined))
## integer(0)
Confirmed_State_6_13_9_13_joined <- full_join(Confirmed_State_6_13,
      Confirmed_State_9_13, by = c("Province_State")) %>% 
      rename(Confirmed_6_13_2020 = "Confirmed.x", Confirmed_9_13_2020 = "Confirmed.y") %>% 
      select(-Country_Region.x, -Country_Region.y) %>% 
      replace_na(list(Confirmed_6_13_2020 = 0))
head(Confirmed_State_6_13_9_13_joined)
## # A tibble: 6 x 3
## # Groups:   Province_State [6]
##   Province_State Confirmed_6_13_2020 Confirmed_9_13_2020
##   <chr>                        <dbl>               <dbl>
## 1 Alabama                      24601              138755
## 2 Alaska                         653                6268
## 3 Arizona                      34660              208512
## 4 Arkansas                     12095               70219
## 5 California                  150018              761728
## 6 Colorado                     29002               61293
which(is.na(Confirmed_State_6_13_9_13_joined))
## integer(0)
Confirmed_State_6_13_9_13_joined_long <- Confirmed_State_6_13_9_13_joined %>% 
  pivot_longer(-c(Province_State), 
               names_to = "Date", values_to = "Confirmed")
head(Confirmed_State_6_13_9_13_joined_long)
## # A tibble: 6 x 3
## # Groups:   Province_State [3]
##   Province_State Date                Confirmed
##   <chr>          <chr>                   <dbl>
## 1 Alabama        Confirmed_6_13_2020     24601
## 2 Alabama        Confirmed_9_13_2020    138755
## 3 Alaska         Confirmed_6_13_2020       653
## 4 Alaska         Confirmed_9_13_2020      6268
## 5 Arizona        Confirmed_6_13_2020     34660
## 6 Arizona        Confirmed_9_13_2020    208512
ggplot(data= Confirmed_State_6_13_9_13_joined_long, aes(x= Confirmed, y= Province_State)) +
  geom_point(aes(color= Date)) +
  theme(axis.text.x = element_text(colour = "grey20", size = 12, angle = 90, hjust = 0.5, vjust = 0.5), axis.text.y = element_text(colour = "grey20", size = 6), strip.text = element_text(face = "italic"), text = element_text(size = 16))

Question 2

Confirmed_in_US_States_joined_long <- Confirmed_State_6_13_9_13_joined_long %>% 
  group_by(Province_State)

ggplot(data= Confirmed_in_US_States_joined_long, aes(x= Province_State, y= Confirmed)) +
  geom_col(aes(color= Date)) +
  theme(axis.text.x = element_text(colour = "grey20", size = 20, angle = 90, hjust = 0.75, vjust = 0.75), axis.text.y = element_text(colour = "grey20", size = 20), strip.text = element_text(face = "italic"), text = element_text(size = 16), axis.title.x = element_text(size= 30), axis.title.y = element_text(size= 30), plot.title = element_text(size = 35)) +
  labs(title = "Bar plot of US States vs Confirmed Cases") + 
  xlab("US states") + 
  ylab("Confirmed Cases")

time_series_confirmed <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv")) %>%
  rename(Province_State = "Province/State", Country_Region = "Country/Region")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `Province/State` = col_character(),
##   `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
head(time_series_confirmed)
## # A tibble: 6 x 249
##   Province_State Country_Region   Lat   Long `1/22/20` `1/23/20` `1/24/20`
##   <chr>          <chr>          <dbl>  <dbl>     <dbl>     <dbl>     <dbl>
## 1 <NA>           Afghanistan     33.9  67.7          0         0         0
## 2 <NA>           Albania         41.2  20.2          0         0         0
## 3 <NA>           Algeria         28.0   1.66         0         0         0
## 4 <NA>           Andorra         42.5   1.52         0         0         0
## 5 <NA>           Angola         -11.2  17.9          0         0         0
## 6 <NA>           Antigua and B…  17.1 -61.8          0         0         0
## # … with 242 more variables: `1/25/20` <dbl>, `1/26/20` <dbl>, `1/27/20` <dbl>,
## #   `1/28/20` <dbl>, `1/29/20` <dbl>, `1/30/20` <dbl>, `1/31/20` <dbl>,
## #   `2/1/20` <dbl>, `2/2/20` <dbl>, `2/3/20` <dbl>, `2/4/20` <dbl>,
## #   `2/5/20` <dbl>, `2/6/20` <dbl>, `2/7/20` <dbl>, `2/8/20` <dbl>,
## #   `2/9/20` <dbl>, `2/10/20` <dbl>, `2/11/20` <dbl>, `2/12/20` <dbl>,
## #   `2/13/20` <dbl>, `2/14/20` <dbl>, `2/15/20` <dbl>, `2/16/20` <dbl>,
## #   `2/17/20` <dbl>, `2/18/20` <dbl>, `2/19/20` <dbl>, `2/20/20` <dbl>,
## #   `2/21/20` <dbl>, `2/22/20` <dbl>, `2/23/20` <dbl>, `2/24/20` <dbl>,
## #   `2/25/20` <dbl>, `2/26/20` <dbl>, `2/27/20` <dbl>, `2/28/20` <dbl>,
## #   `2/29/20` <dbl>, `3/1/20` <dbl>, `3/2/20` <dbl>, `3/3/20` <dbl>,
## #   `3/4/20` <dbl>, `3/5/20` <dbl>, `3/6/20` <dbl>, `3/7/20` <dbl>,
## #   `3/8/20` <dbl>, `3/9/20` <dbl>, `3/10/20` <dbl>, `3/11/20` <dbl>,
## #   `3/12/20` <dbl>, `3/13/20` <dbl>, `3/14/20` <dbl>, `3/15/20` <dbl>,
## #   `3/16/20` <dbl>, `3/17/20` <dbl>, `3/18/20` <dbl>, `3/19/20` <dbl>,
## #   `3/20/20` <dbl>, `3/21/20` <dbl>, `3/22/20` <dbl>, `3/23/20` <dbl>,
## #   `3/24/20` <dbl>, `3/25/20` <dbl>, `3/26/20` <dbl>, `3/27/20` <dbl>,
## #   `3/28/20` <dbl>, `3/29/20` <dbl>, `3/30/20` <dbl>, `3/31/20` <dbl>,
## #   `4/1/20` <dbl>, `4/2/20` <dbl>, `4/3/20` <dbl>, `4/4/20` <dbl>,
## #   `4/5/20` <dbl>, `4/6/20` <dbl>, `4/7/20` <dbl>, `4/8/20` <dbl>,
## #   `4/9/20` <dbl>, `4/10/20` <dbl>, `4/11/20` <dbl>, `4/12/20` <dbl>,
## #   `4/13/20` <dbl>, `4/14/20` <dbl>, `4/15/20` <dbl>, `4/16/20` <dbl>,
## #   `4/17/20` <dbl>, `4/18/20` <dbl>, `4/19/20` <dbl>, `4/20/20` <dbl>,
## #   `4/21/20` <dbl>, `4/22/20` <dbl>, `4/23/20` <dbl>, `4/24/20` <dbl>,
## #   `4/25/20` <dbl>, `4/26/20` <dbl>, `4/27/20` <dbl>, `4/28/20` <dbl>,
## #   `4/29/20` <dbl>, `4/30/20` <dbl>, `5/1/20` <dbl>, `5/2/20` <dbl>,
## #   `5/3/20` <dbl>, …
time_series_confirmed_long <- time_series_confirmed %>% 
               pivot_longer(-c(Province_State, Country_Region, Lat, Long), names_to = "Date", values_to = "Confirmed") 
head(time_series_confirmed_long)
## # A tibble: 6 x 6
##   Province_State Country_Region   Lat  Long Date    Confirmed
##   <chr>          <chr>          <dbl> <dbl> <chr>       <dbl>
## 1 <NA>           Afghanistan     33.9  67.7 1/22/20         0
## 2 <NA>           Afghanistan     33.9  67.7 1/23/20         0
## 3 <NA>           Afghanistan     33.9  67.7 1/24/20         0
## 4 <NA>           Afghanistan     33.9  67.7 1/25/20         0
## 5 <NA>           Afghanistan     33.9  67.7 1/26/20         0
## 6 <NA>           Afghanistan     33.9  67.7 1/27/20         0
 download.file(url="https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv", destfile = "data/time_series_covid19_deaths_global.csv")
time_series_deaths <- read_csv("data/time_series_covid19_deaths_global.csv")%>%
  rename(Province_State = "Province/State", Country_Region = "Country/Region") 
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   `Province/State` = col_character(),
##   `Country/Region` = col_character()
## )
## See spec(...) for full column specifications.
head(time_series_deaths)
## # A tibble: 6 x 249
##   Province_State Country_Region   Lat   Long `1/22/20` `1/23/20` `1/24/20`
##   <chr>          <chr>          <dbl>  <dbl>     <dbl>     <dbl>     <dbl>
## 1 <NA>           Afghanistan     33.9  67.7          0         0         0
## 2 <NA>           Albania         41.2  20.2          0         0         0
## 3 <NA>           Algeria         28.0   1.66         0         0         0
## 4 <NA>           Andorra         42.5   1.52         0         0         0
## 5 <NA>           Angola         -11.2  17.9          0         0         0
## 6 <NA>           Antigua and B…  17.1 -61.8          0         0         0
## # … with 242 more variables: `1/25/20` <dbl>, `1/26/20` <dbl>, `1/27/20` <dbl>,
## #   `1/28/20` <dbl>, `1/29/20` <dbl>, `1/30/20` <dbl>, `1/31/20` <dbl>,
## #   `2/1/20` <dbl>, `2/2/20` <dbl>, `2/3/20` <dbl>, `2/4/20` <dbl>,
## #   `2/5/20` <dbl>, `2/6/20` <dbl>, `2/7/20` <dbl>, `2/8/20` <dbl>,
## #   `2/9/20` <dbl>, `2/10/20` <dbl>, `2/11/20` <dbl>, `2/12/20` <dbl>,
## #   `2/13/20` <dbl>, `2/14/20` <dbl>, `2/15/20` <dbl>, `2/16/20` <dbl>,
## #   `2/17/20` <dbl>, `2/18/20` <dbl>, `2/19/20` <dbl>, `2/20/20` <dbl>,
## #   `2/21/20` <dbl>, `2/22/20` <dbl>, `2/23/20` <dbl>, `2/24/20` <dbl>,
## #   `2/25/20` <dbl>, `2/26/20` <dbl>, `2/27/20` <dbl>, `2/28/20` <dbl>,
## #   `2/29/20` <dbl>, `3/1/20` <dbl>, `3/2/20` <dbl>, `3/3/20` <dbl>,
## #   `3/4/20` <dbl>, `3/5/20` <dbl>, `3/6/20` <dbl>, `3/7/20` <dbl>,
## #   `3/8/20` <dbl>, `3/9/20` <dbl>, `3/10/20` <dbl>, `3/11/20` <dbl>,
## #   `3/12/20` <dbl>, `3/13/20` <dbl>, `3/14/20` <dbl>, `3/15/20` <dbl>,
## #   `3/16/20` <dbl>, `3/17/20` <dbl>, `3/18/20` <dbl>, `3/19/20` <dbl>,
## #   `3/20/20` <dbl>, `3/21/20` <dbl>, `3/22/20` <dbl>, `3/23/20` <dbl>,
## #   `3/24/20` <dbl>, `3/25/20` <dbl>, `3/26/20` <dbl>, `3/27/20` <dbl>,
## #   `3/28/20` <dbl>, `3/29/20` <dbl>, `3/30/20` <dbl>, `3/31/20` <dbl>,
## #   `4/1/20` <dbl>, `4/2/20` <dbl>, `4/3/20` <dbl>, `4/4/20` <dbl>,
## #   `4/5/20` <dbl>, `4/6/20` <dbl>, `4/7/20` <dbl>, `4/8/20` <dbl>,
## #   `4/9/20` <dbl>, `4/10/20` <dbl>, `4/11/20` <dbl>, `4/12/20` <dbl>,
## #   `4/13/20` <dbl>, `4/14/20` <dbl>, `4/15/20` <dbl>, `4/16/20` <dbl>,
## #   `4/17/20` <dbl>, `4/18/20` <dbl>, `4/19/20` <dbl>, `4/20/20` <dbl>,
## #   `4/21/20` <dbl>, `4/22/20` <dbl>, `4/23/20` <dbl>, `4/24/20` <dbl>,
## #   `4/25/20` <dbl>, `4/26/20` <dbl>, `4/27/20` <dbl>, `4/28/20` <dbl>,
## #   `4/29/20` <dbl>, `4/30/20` <dbl>, `5/1/20` <dbl>, `5/2/20` <dbl>,
## #   `5/3/20` <dbl>, …
time_series_deaths_long <- time_series_deaths %>% 
  pivot_longer(-c(Province_State, Country_Region, Lat, Long), names_to = "Date", values_to = "Deaths")

head(time_series_deaths_long)
## # A tibble: 6 x 6
##   Province_State Country_Region   Lat  Long Date    Deaths
##   <chr>          <chr>          <dbl> <dbl> <chr>    <dbl>
## 1 <NA>           Afghanistan     33.9  67.7 1/22/20      0
## 2 <NA>           Afghanistan     33.9  67.7 1/23/20      0
## 3 <NA>           Afghanistan     33.9  67.7 1/24/20      0
## 4 <NA>           Afghanistan     33.9  67.7 1/25/20      0
## 5 <NA>           Afghanistan     33.9  67.7 1/26/20      0
## 6 <NA>           Afghanistan     33.9  67.7 1/27/20      0
time_series_confirmed_long <- time_series_confirmed_long %>% 
  unite(Key, Province_State, Country_Region, Date, sep = ".", remove = FALSE)
head(time_series_confirmed_long)
## # A tibble: 6 x 7
##   Key                 Province_State Country_Region   Lat  Long Date   Confirmed
##   <chr>               <chr>          <chr>          <dbl> <dbl> <chr>      <dbl>
## 1 NA.Afghanistan.1/2… <NA>           Afghanistan     33.9  67.7 1/22/…         0
## 2 NA.Afghanistan.1/2… <NA>           Afghanistan     33.9  67.7 1/23/…         0
## 3 NA.Afghanistan.1/2… <NA>           Afghanistan     33.9  67.7 1/24/…         0
## 4 NA.Afghanistan.1/2… <NA>           Afghanistan     33.9  67.7 1/25/…         0
## 5 NA.Afghanistan.1/2… <NA>           Afghanistan     33.9  67.7 1/26/…         0
## 6 NA.Afghanistan.1/2… <NA>           Afghanistan     33.9  67.7 1/27/…         0
time_series_deaths_long <- time_series_deaths_long %>% 
  unite(Key, Province_State, Country_Region, Date, sep = ".") %>% 
  select(Key, Deaths)
head(time_series_deaths_long)
## # A tibble: 6 x 2
##   Key                    Deaths
##   <chr>                   <dbl>
## 1 NA.Afghanistan.1/22/20      0
## 2 NA.Afghanistan.1/23/20      0
## 3 NA.Afghanistan.1/24/20      0
## 4 NA.Afghanistan.1/25/20      0
## 5 NA.Afghanistan.1/26/20      0
## 6 NA.Afghanistan.1/27/20      0
time_series_long_joined <- full_join(time_series_confirmed_long, time_series_deaths_long, by= c("Key")) %>% 
  select(-Key)

time_series_long_joined
## # A tibble: 65,170 x 7
##    Province_State Country_Region   Lat  Long Date    Confirmed Deaths
##    <chr>          <chr>          <dbl> <dbl> <chr>       <dbl>  <dbl>
##  1 <NA>           Afghanistan     33.9  67.7 1/22/20         0      0
##  2 <NA>           Afghanistan     33.9  67.7 1/23/20         0      0
##  3 <NA>           Afghanistan     33.9  67.7 1/24/20         0      0
##  4 <NA>           Afghanistan     33.9  67.7 1/25/20         0      0
##  5 <NA>           Afghanistan     33.9  67.7 1/26/20         0      0
##  6 <NA>           Afghanistan     33.9  67.7 1/27/20         0      0
##  7 <NA>           Afghanistan     33.9  67.7 1/28/20         0      0
##  8 <NA>           Afghanistan     33.9  67.7 1/29/20         0      0
##  9 <NA>           Afghanistan     33.9  67.7 1/30/20         0      0
## 10 <NA>           Afghanistan     33.9  67.7 1/31/20         0      0
## # … with 65,160 more rows
which(is.na(time_series_long_joined$Confirmed))
## integer(0)
which(is.na(time_series_long_joined$Deaths))
## integer(0)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
time_series_long_joined$Date <- mdy(time_series_long_joined$Date)
time_series_long_joined_counts <- time_series_long_joined %>% 
  pivot_longer(-c(Province_State, Country_Region, Lat, Long, Date), names_to = "Report_Type", values_to = "Counts")

head(time_series_long_joined_counts)
## # A tibble: 6 x 7
##   Province_State Country_Region   Lat  Long Date       Report_Type Counts
##   <chr>          <chr>          <dbl> <dbl> <date>     <chr>        <dbl>
## 1 <NA>           Afghanistan     33.9  67.7 2020-01-22 Confirmed        0
## 2 <NA>           Afghanistan     33.9  67.7 2020-01-22 Deaths           0
## 3 <NA>           Afghanistan     33.9  67.7 2020-01-23 Confirmed        0
## 4 <NA>           Afghanistan     33.9  67.7 2020-01-23 Deaths           0
## 5 <NA>           Afghanistan     33.9  67.7 2020-01-24 Confirmed        0
## 6 <NA>           Afghanistan     33.9  67.7 2020-01-24 Deaths           0
head(time_series_long_joined_counts)
## # A tibble: 6 x 7
##   Province_State Country_Region   Lat  Long Date       Report_Type Counts
##   <chr>          <chr>          <dbl> <dbl> <date>     <chr>        <dbl>
## 1 <NA>           Afghanistan     33.9  67.7 2020-01-22 Confirmed        0
## 2 <NA>           Afghanistan     33.9  67.7 2020-01-22 Deaths           0
## 3 <NA>           Afghanistan     33.9  67.7 2020-01-23 Confirmed        0
## 4 <NA>           Afghanistan     33.9  67.7 2020-01-23 Deaths           0
## 5 <NA>           Afghanistan     33.9  67.7 2020-01-24 Confirmed        0
## 6 <NA>           Afghanistan     33.9  67.7 2020-01-24 Deaths           0

Question 4

time_series_long_joined_counts %>% 
  group_by(Date) %>% 
  filter (Report_Type == "Deaths") %>% 
    ggplot(aes(x = Date, y= Counts)) + 
    geom_col() +
    geom_line() +
    ggtitle("Worldwide Confiremd Deaths per day") +
  xlab("Per Day") +
  ylab("Totals Confirmed Deaths")

time_series_long_joined %>% 
  group_by(Country_Region,Date) %>% 
  summarise_at(c("Confirmed", "Deaths"), sum) %>% 
  filter (Country_Region %in% c("China","Japan", "Korea, South",
                                "Italy","Spain", "US")) %>% 
    ggplot(aes(x = Date,  y = Deaths)) + 
    geom_point() +
    geom_line() +
    ggtitle("COVID-19 Deaths") +
    facet_wrap(~Country_Region, ncol=2, scales="free_y")

# 6
head(time_series_long_joined)
## # A tibble: 6 x 7
##   Province_State Country_Region   Lat  Long Date       Confirmed Deaths
##   <chr>          <chr>          <dbl> <dbl> <date>         <dbl>  <dbl>
## 1 <NA>           Afghanistan     33.9  67.7 2020-01-22         0      0
## 2 <NA>           Afghanistan     33.9  67.7 2020-01-23         0      0
## 3 <NA>           Afghanistan     33.9  67.7 2020-01-24         0      0
## 4 <NA>           Afghanistan     33.9  67.7 2020-01-25         0      0
## 5 <NA>           Afghanistan     33.9  67.7 2020-01-26         0      0
## 6 <NA>           Afghanistan     33.9  67.7 2020-01-27         0      0

Question 6A: Total Deaths

time_series_long_joined %>% 
  group_by(Country_Region,Date) %>% 
  summarise_at(c("Confirmed", "Deaths"), sum) %>% 
  filter (Country_Region == "US") %>% 
    ggplot(aes(x = Date,  y = log2(Deaths), color = Deaths)) + 
    geom_point() +
    geom_line() +
    ggtitle("US COVID-19 Deaths per Day") +
  xlab("per Day") +
  ylab("log2(Total Deaths)")

Question 6B: Total Confirmed

time_series_long_joined %>% 
  group_by(Country_Region,Date) %>% 
  summarise_at(c("Confirmed", "Deaths"), sum) %>% 
  filter (Country_Region == "US") %>% 
    ggplot(aes(x = Date,  y = log2(Confirmed), color = Confirmed)) + 
    geom_point() +
    geom_line() +
    ggtitle("US COVID-19 Confirmed") +
  xlab("per Day") +
  ylab("lohg2(Total Confirmed)")

time_series_long_joined %>% 
  group_by(Country_Region,Date) %>% 
  summarise_at(c("Confirmed", "Deaths"), sum) %>% 
  filter (Country_Region %in% c("China","Japan", "Korea, South",
                                "Italy","Spain", "US")) %>% 
    ggplot(aes(x = Date,  y = Deaths)) + 
    geom_point() +
    geom_line() +
    ggtitle("COVID-19 Deaths") +
    facet_wrap(~Country_Region, ncol=2, scales="free_y")

head(time_series_long_joined)
## # A tibble: 6 x 7
##   Province_State Country_Region   Lat  Long Date       Confirmed Deaths
##   <chr>          <chr>          <dbl> <dbl> <date>         <dbl>  <dbl>
## 1 <NA>           Afghanistan     33.9  67.7 2020-01-22         0      0
## 2 <NA>           Afghanistan     33.9  67.7 2020-01-23         0      0
## 3 <NA>           Afghanistan     33.9  67.7 2020-01-24         0      0
## 4 <NA>           Afghanistan     33.9  67.7 2020-01-25         0      0
## 5 <NA>           Afghanistan     33.9  67.7 2020-01-26         0      0
## 6 <NA>           Afghanistan     33.9  67.7 2020-01-27         0      0
top10_countries_time_series_long_joined <- time_series_long_joined %>% 
  group_by(`Country_Region`) %>% 
  summarize(Deaths = sum(Deaths)) %>% 
  arrange(desc(Deaths)) %>% 
  slice(1:10)
## `summarise()` ungrouping output (override with `.groups` argument)
top10_countries_time_series_long_joined
## # A tibble: 10 x 2
##    Country_Region   Deaths
##    <chr>             <dbl>
##  1 US             21282360
##  2 Brazil         10519674
##  3 United Kingdom  6156590
##  4 Italy           5726655
##  5 Mexico          5201319
##  6 France          4793527
##  7 Spain           4693751
##  8 India           4551683
##  9 Iran            2202060
## 10 Peru            2153636

Question 7

time_series_long_joined %>% 
  group_by(Country_Region,Date) %>% 
  summarise_at(c("Confirmed", "Deaths"), sum) %>% 
  filter (Country_Region %in% c("US", "Brazil","United Kingdom", "Italy", "Mexico", "France","Spain", "India", "Iran", "Peru")) %>% 
    ggplot(aes(x = Date,  y = Deaths, color = Country_Region)) + 
    geom_point() +
    geom_line() +
    ggtitle("Countries with top 10 COVID-19 Deaths") +
  xlab("per Day") +
  ylab("Total Deaths")

head(time_series_long_joined)
## # A tibble: 6 x 7
##   Province_State Country_Region   Lat  Long Date       Confirmed Deaths
##   <chr>          <chr>          <dbl> <dbl> <date>         <dbl>  <dbl>
## 1 <NA>           Afghanistan     33.9  67.7 2020-01-22         0      0
## 2 <NA>           Afghanistan     33.9  67.7 2020-01-23         0      0
## 3 <NA>           Afghanistan     33.9  67.7 2020-01-24         0      0
## 4 <NA>           Afghanistan     33.9  67.7 2020-01-25         0      0
## 5 <NA>           Afghanistan     33.9  67.7 2020-01-26         0      0
## 6 <NA>           Afghanistan     33.9  67.7 2020-01-27         0      0

Question 8

time_series_long_joined %>% 
  group_by(Country_Region,Date) %>% 
  summarise_at(c("Confirmed", "Deaths"), sum) %>% 
  filter (Country_Region %in% c("US", "Brazil","United Kingdom", "Italy", "Mexico", "France","Spain", "India", "Iran", "Peru")) %>% 
    ggplot(aes(x = Date,  y = Deaths, color = Country_Region)) + 
    geom_point() +
    geom_line() +
    ggtitle("Countries with top 10 COVID-19 Deaths") +
    facet_wrap(~Country_Region, ncol=2, scales="free_y") +
  theme(axis.text.x = element_text(colour = "red", size = 8, hjust = 2, vjust = 2), axis.text.y = element_text(colour = "red", size = 6), strip.text = element_text(face = "italic"), text = element_text(size = 15), axis.title.x = element_text(size = 20), axis.title.y = element_text(size = 20), plot.title = element_text(size = 20, face = "bold"))

time_series_confirmed_USA <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_US.csv")) 
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   iso2 = col_character(),
##   iso3 = col_character(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Combined_Key = col_character()
## )
## See spec(...) for full column specifications.
head(time_series_confirmed_USA)
## # A tibble: 6 x 256
##      UID iso2  iso3  code3  FIPS Admin2 Province_State Country_Region   Lat
##    <dbl> <chr> <chr> <dbl> <dbl> <chr>  <chr>          <chr>          <dbl>
## 1 8.40e7 US    USA     840  1001 Autau… Alabama        US              32.5
## 2 8.40e7 US    USA     840  1003 Baldw… Alabama        US              30.7
## 3 8.40e7 US    USA     840  1005 Barbo… Alabama        US              31.9
## 4 8.40e7 US    USA     840  1007 Bibb   Alabama        US              33.0
## 5 8.40e7 US    USA     840  1009 Blount Alabama        US              34.0
## 6 8.40e7 US    USA     840  1011 Bullo… Alabama        US              32.1
## # … with 247 more variables: Long_ <dbl>, Combined_Key <chr>, `1/22/20` <dbl>,
## #   `1/23/20` <dbl>, `1/24/20` <dbl>, `1/25/20` <dbl>, `1/26/20` <dbl>,
## #   `1/27/20` <dbl>, `1/28/20` <dbl>, `1/29/20` <dbl>, `1/30/20` <dbl>,
## #   `1/31/20` <dbl>, `2/1/20` <dbl>, `2/2/20` <dbl>, `2/3/20` <dbl>,
## #   `2/4/20` <dbl>, `2/5/20` <dbl>, `2/6/20` <dbl>, `2/7/20` <dbl>,
## #   `2/8/20` <dbl>, `2/9/20` <dbl>, `2/10/20` <dbl>, `2/11/20` <dbl>,
## #   `2/12/20` <dbl>, `2/13/20` <dbl>, `2/14/20` <dbl>, `2/15/20` <dbl>,
## #   `2/16/20` <dbl>, `2/17/20` <dbl>, `2/18/20` <dbl>, `2/19/20` <dbl>,
## #   `2/20/20` <dbl>, `2/21/20` <dbl>, `2/22/20` <dbl>, `2/23/20` <dbl>,
## #   `2/24/20` <dbl>, `2/25/20` <dbl>, `2/26/20` <dbl>, `2/27/20` <dbl>,
## #   `2/28/20` <dbl>, `2/29/20` <dbl>, `3/1/20` <dbl>, `3/2/20` <dbl>,
## #   `3/3/20` <dbl>, `3/4/20` <dbl>, `3/5/20` <dbl>, `3/6/20` <dbl>,
## #   `3/7/20` <dbl>, `3/8/20` <dbl>, `3/9/20` <dbl>, `3/10/20` <dbl>,
## #   `3/11/20` <dbl>, `3/12/20` <dbl>, `3/13/20` <dbl>, `3/14/20` <dbl>,
## #   `3/15/20` <dbl>, `3/16/20` <dbl>, `3/17/20` <dbl>, `3/18/20` <dbl>,
## #   `3/19/20` <dbl>, `3/20/20` <dbl>, `3/21/20` <dbl>, `3/22/20` <dbl>,
## #   `3/23/20` <dbl>, `3/24/20` <dbl>, `3/25/20` <dbl>, `3/26/20` <dbl>,
## #   `3/27/20` <dbl>, `3/28/20` <dbl>, `3/29/20` <dbl>, `3/30/20` <dbl>,
## #   `3/31/20` <dbl>, `4/1/20` <dbl>, `4/2/20` <dbl>, `4/3/20` <dbl>,
## #   `4/4/20` <dbl>, `4/5/20` <dbl>, `4/6/20` <dbl>, `4/7/20` <dbl>,
## #   `4/8/20` <dbl>, `4/9/20` <dbl>, `4/10/20` <dbl>, `4/11/20` <dbl>,
## #   `4/12/20` <dbl>, `4/13/20` <dbl>, `4/14/20` <dbl>, `4/15/20` <dbl>,
## #   `4/16/20` <dbl>, `4/17/20` <dbl>, `4/18/20` <dbl>, `4/19/20` <dbl>,
## #   `4/20/20` <dbl>, `4/21/20` <dbl>, `4/22/20` <dbl>, `4/23/20` <dbl>,
## #   `4/24/20` <dbl>, `4/25/20` <dbl>, `4/26/20` <dbl>, `4/27/20` <dbl>,
## #   `4/28/20` <dbl>, …
time_series_confirmed_USA %>% 
  group_by(Province_State)
## # A tibble: 3,340 x 256
## # Groups:   Province_State [58]
##       UID iso2  iso3  code3  FIPS Admin2 Province_State Country_Region   Lat
##     <dbl> <chr> <chr> <dbl> <dbl> <chr>  <chr>          <chr>          <dbl>
##  1 8.40e7 US    USA     840  1001 Autau… Alabama        US              32.5
##  2 8.40e7 US    USA     840  1003 Baldw… Alabama        US              30.7
##  3 8.40e7 US    USA     840  1005 Barbo… Alabama        US              31.9
##  4 8.40e7 US    USA     840  1007 Bibb   Alabama        US              33.0
##  5 8.40e7 US    USA     840  1009 Blount Alabama        US              34.0
##  6 8.40e7 US    USA     840  1011 Bullo… Alabama        US              32.1
##  7 8.40e7 US    USA     840  1013 Butler Alabama        US              31.8
##  8 8.40e7 US    USA     840  1015 Calho… Alabama        US              33.8
##  9 8.40e7 US    USA     840  1017 Chamb… Alabama        US              32.9
## 10 8.40e7 US    USA     840  1019 Chero… Alabama        US              34.2
## # … with 3,330 more rows, and 247 more variables: Long_ <dbl>,
## #   Combined_Key <chr>, `1/22/20` <dbl>, `1/23/20` <dbl>, `1/24/20` <dbl>,
## #   `1/25/20` <dbl>, `1/26/20` <dbl>, `1/27/20` <dbl>, `1/28/20` <dbl>,
## #   `1/29/20` <dbl>, `1/30/20` <dbl>, `1/31/20` <dbl>, `2/1/20` <dbl>,
## #   `2/2/20` <dbl>, `2/3/20` <dbl>, `2/4/20` <dbl>, `2/5/20` <dbl>,
## #   `2/6/20` <dbl>, `2/7/20` <dbl>, `2/8/20` <dbl>, `2/9/20` <dbl>,
## #   `2/10/20` <dbl>, `2/11/20` <dbl>, `2/12/20` <dbl>, `2/13/20` <dbl>,
## #   `2/14/20` <dbl>, `2/15/20` <dbl>, `2/16/20` <dbl>, `2/17/20` <dbl>,
## #   `2/18/20` <dbl>, `2/19/20` <dbl>, `2/20/20` <dbl>, `2/21/20` <dbl>,
## #   `2/22/20` <dbl>, `2/23/20` <dbl>, `2/24/20` <dbl>, `2/25/20` <dbl>,
## #   `2/26/20` <dbl>, `2/27/20` <dbl>, `2/28/20` <dbl>, `2/29/20` <dbl>,
## #   `3/1/20` <dbl>, `3/2/20` <dbl>, `3/3/20` <dbl>, `3/4/20` <dbl>,
## #   `3/5/20` <dbl>, `3/6/20` <dbl>, `3/7/20` <dbl>, `3/8/20` <dbl>,
## #   `3/9/20` <dbl>, `3/10/20` <dbl>, `3/11/20` <dbl>, `3/12/20` <dbl>,
## #   `3/13/20` <dbl>, `3/14/20` <dbl>, `3/15/20` <dbl>, `3/16/20` <dbl>,
## #   `3/17/20` <dbl>, `3/18/20` <dbl>, `3/19/20` <dbl>, `3/20/20` <dbl>,
## #   `3/21/20` <dbl>, `3/22/20` <dbl>, `3/23/20` <dbl>, `3/24/20` <dbl>,
## #   `3/25/20` <dbl>, `3/26/20` <dbl>, `3/27/20` <dbl>, `3/28/20` <dbl>,
## #   `3/29/20` <dbl>, `3/30/20` <dbl>, `3/31/20` <dbl>, `4/1/20` <dbl>,
## #   `4/2/20` <dbl>, `4/3/20` <dbl>, `4/4/20` <dbl>, `4/5/20` <dbl>,
## #   `4/6/20` <dbl>, `4/7/20` <dbl>, `4/8/20` <dbl>, `4/9/20` <dbl>,
## #   `4/10/20` <dbl>, `4/11/20` <dbl>, `4/12/20` <dbl>, `4/13/20` <dbl>,
## #   `4/14/20` <dbl>, `4/15/20` <dbl>, `4/16/20` <dbl>, `4/17/20` <dbl>,
## #   `4/18/20` <dbl>, `4/19/20` <dbl>, `4/20/20` <dbl>, `4/21/20` <dbl>,
## #   `4/22/20` <dbl>, `4/23/20` <dbl>, `4/24/20` <dbl>, `4/25/20` <dbl>,
## #   `4/26/20` <dbl>, `4/27/20` <dbl>, `4/28/20` <dbl>, …
head(time_series_confirmed_USA)
## # A tibble: 6 x 256
##      UID iso2  iso3  code3  FIPS Admin2 Province_State Country_Region   Lat
##    <dbl> <chr> <chr> <dbl> <dbl> <chr>  <chr>          <chr>          <dbl>
## 1 8.40e7 US    USA     840  1001 Autau… Alabama        US              32.5
## 2 8.40e7 US    USA     840  1003 Baldw… Alabama        US              30.7
## 3 8.40e7 US    USA     840  1005 Barbo… Alabama        US              31.9
## 4 8.40e7 US    USA     840  1007 Bibb   Alabama        US              33.0
## 5 8.40e7 US    USA     840  1009 Blount Alabama        US              34.0
## 6 8.40e7 US    USA     840  1011 Bullo… Alabama        US              32.1
## # … with 247 more variables: Long_ <dbl>, Combined_Key <chr>, `1/22/20` <dbl>,
## #   `1/23/20` <dbl>, `1/24/20` <dbl>, `1/25/20` <dbl>, `1/26/20` <dbl>,
## #   `1/27/20` <dbl>, `1/28/20` <dbl>, `1/29/20` <dbl>, `1/30/20` <dbl>,
## #   `1/31/20` <dbl>, `2/1/20` <dbl>, `2/2/20` <dbl>, `2/3/20` <dbl>,
## #   `2/4/20` <dbl>, `2/5/20` <dbl>, `2/6/20` <dbl>, `2/7/20` <dbl>,
## #   `2/8/20` <dbl>, `2/9/20` <dbl>, `2/10/20` <dbl>, `2/11/20` <dbl>,
## #   `2/12/20` <dbl>, `2/13/20` <dbl>, `2/14/20` <dbl>, `2/15/20` <dbl>,
## #   `2/16/20` <dbl>, `2/17/20` <dbl>, `2/18/20` <dbl>, `2/19/20` <dbl>,
## #   `2/20/20` <dbl>, `2/21/20` <dbl>, `2/22/20` <dbl>, `2/23/20` <dbl>,
## #   `2/24/20` <dbl>, `2/25/20` <dbl>, `2/26/20` <dbl>, `2/27/20` <dbl>,
## #   `2/28/20` <dbl>, `2/29/20` <dbl>, `3/1/20` <dbl>, `3/2/20` <dbl>,
## #   `3/3/20` <dbl>, `3/4/20` <dbl>, `3/5/20` <dbl>, `3/6/20` <dbl>,
## #   `3/7/20` <dbl>, `3/8/20` <dbl>, `3/9/20` <dbl>, `3/10/20` <dbl>,
## #   `3/11/20` <dbl>, `3/12/20` <dbl>, `3/13/20` <dbl>, `3/14/20` <dbl>,
## #   `3/15/20` <dbl>, `3/16/20` <dbl>, `3/17/20` <dbl>, `3/18/20` <dbl>,
## #   `3/19/20` <dbl>, `3/20/20` <dbl>, `3/21/20` <dbl>, `3/22/20` <dbl>,
## #   `3/23/20` <dbl>, `3/24/20` <dbl>, `3/25/20` <dbl>, `3/26/20` <dbl>,
## #   `3/27/20` <dbl>, `3/28/20` <dbl>, `3/29/20` <dbl>, `3/30/20` <dbl>,
## #   `3/31/20` <dbl>, `4/1/20` <dbl>, `4/2/20` <dbl>, `4/3/20` <dbl>,
## #   `4/4/20` <dbl>, `4/5/20` <dbl>, `4/6/20` <dbl>, `4/7/20` <dbl>,
## #   `4/8/20` <dbl>, `4/9/20` <dbl>, `4/10/20` <dbl>, `4/11/20` <dbl>,
## #   `4/12/20` <dbl>, `4/13/20` <dbl>, `4/14/20` <dbl>, `4/15/20` <dbl>,
## #   `4/16/20` <dbl>, `4/17/20` <dbl>, `4/18/20` <dbl>, `4/19/20` <dbl>,
## #   `4/20/20` <dbl>, `4/21/20` <dbl>, `4/22/20` <dbl>, `4/23/20` <dbl>,
## #   `4/24/20` <dbl>, `4/25/20` <dbl>, `4/26/20` <dbl>, `4/27/20` <dbl>,
## #   `4/28/20` <dbl>, …
time_series_confirmed_USA <- time_series_confirmed_USA %>% 
  select(-c(UID, iso2, iso3, code3, FIPS, Admin2, Combined_Key, Country_Region, Lat, Long_))
time_series_confirmed_USA <- time_series_confirmed_USA %>% 
  pivot_longer(-c(Province_State), 
               names_to = "Date", values_to = "Confirmed")

head(time_series_confirmed_USA)
## # A tibble: 6 x 3
##   Province_State Date    Confirmed
##   <chr>          <chr>       <dbl>
## 1 Alabama        1/22/20         0
## 2 Alabama        1/23/20         0
## 3 Alabama        1/24/20         0
## 4 Alabama        1/25/20         0
## 5 Alabama        1/26/20         0
## 6 Alabama        1/27/20         0

Question 9

time_series_confirmed_USA %>% 
  group_by(Province_State) %>% 
  ggplot(aes(x= Date, y= Confirmed, color= Province_State)) +
    geom_point() +
    geom_line() +
    ggtitle("Confirmed Cases in US States") +
  theme(axis.text.x = element_text(colour = "black", size = 30, angle = 180, hjust = 2, vjust = 2), axis.text.y = element_text(colour = "black", size = 30), strip.text = element_text(face = "italic"), text = element_text(size = 30), axis.title.x = element_text(size = 80), axis.title.y = element_text(size = 80), plot.title = element_text(size = 125, face = "bold")) +
    facet_wrap(~Province_State, ncol=2, scales="free_y") +
  ylab("total Confirmed") +
  xlab("per Day")
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?